In [1]:
#install.packages("dplyr",dependencies = TRUE)
#install.packages("hash",dependencies = TRUE)
#install.packages("tidyverse",dependencies = TRUE)
#install.packages("ggiraph",dependencies = TRUE)
#install.packages("plotly",dependencies = TRUE)
#install.packages("ggplot2",dependencies=TRUE)
#install.packages("RColorBrewer",dependencies=TRUE)
#install.packages("colorRamps",dependencies=TRUE)
#install.packages("gapminder",dependencies=TRUE)
#install.packages("lubridate")
install.packages("gganimate",dependencies=TRUE)
Installing package into 'C:/Users/laxma/Documents/R/win-library/3.6'
(as 'lib' is unspecified)
package 'gganimate' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\laxma\AppData\Local\Temp\Rtmp6HQkP2\downloaded_packages
In [1]:
library(dplyr)
library(hash)
library(tidyverse)
library(plotly)
library(ggplot2)
library(RColorBrewer)
library(colorRamps)
library(gapminder)
library(gganimate)
library(ggthemes)
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

hash-2.2.6.1 provided by Decision Patterns

-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.2.1     v readr   1.3.1
v tibble  2.1.3     v purrr   0.3.2
v tidyr   1.0.0     v stringr 1.4.0
v ggplot2 3.2.1     v forcats 0.4.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

Loading cleaned dataset

In [2]:
df <- readRDS('CleanedData.Rda')
partnerNameHash <- readRDS("PartnerName.Rda")
stateNameHash <- readRDS("StateName.Rda")
In [3]:
ageGroup <- c("0-18","19-36","37-54","55 and above")
df$AgeGroup <- cut(df$Age, breaks = c(0,18,36,54, Inf), labels = ageGroup, right = FALSE)
In [4]:
table(df$Gender,df$PartnerCode)
        
           AA   AS   B6   DL   EV   F9   FL   HA   MQ   OO   OU   US   VX   WN
  Female  314  189  198  891  643   93  133   10  273  707  534  564   57 1230
  Male    234  132  154  670  522   65  114    3  225  531  387  400   51  958

Theme for plots

In [5]:
generateScoreGraph <- function(X1,y1,xlabel,ylabel,title,yRange){
  generatedPlot <-ggplot(df,aes_string(x=X1,y=y1))+
    stat_summary(fun.y = "mean",geom="bar",)+
    xlab(xlabel)+ ylab(ylabel)+ggtitle(title)+
    coord_cartesian(ylim = yRange)+
    theme_classic()+
    theme(axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 10),
        plot.title = element_text(size = 15, hjust = 0.5),
          panel.grid.major = element_line(color="#e6e6e6",linetype=1))
  return (generatedPlot)
}


plotTheme <- theme_classic()+theme(axis.title.x = element_text(size = 12),
             axis.title.y = element_text(size = 10),plot.title = element_text(size = 15, hjust = 0.5),
             panel.grid.major = element_line(color="#e6e6e6",linetype=1))

Average recommendation score

Box plot of Traveller class and Likelihood to recommend Score

In [6]:
classPlot <- ggplot(df,aes(Class,LikelihoodRecommendScore))+geom_boxplot(aes(fill=Class))+
             xlab("Traveller Class")+ ylab("Likelihood to recommend Score")+plotTheme
             #ggtitle("Box plot of recommendation score by traveller class")
table(df$Class)
#ggplotly(classPlot)
Business      Eco Eco Plus 
     787     8416     1079 

Box plot of Gender and Likelihood to recommend Score

In [7]:
genderPlot <- ggplot(df,aes(Gender,LikelihoodRecommendScore))+geom_boxplot(aes(fill=Gender))+
             xlab("Gender")+ ylab("Likelihood to recommend Score")+plotTheme
             #ggtitle("Box plot of recommendation score by gender")
table(df$Gender)
#ggplotly(genderPlot)
Female   Male 
  5836   4446 

Box plot of Travel type and Likelihood to recommend Score

In [8]:
traveltypePlot <-ggplot(df,aes(TypeOfTravel,LikelihoodRecommendScore))+geom_boxplot(aes(fill=TypeOfTravel))+
                 xlab("Traveller Type")+ ylab("Likelihood to recommend Score")+plotTheme
                 #ggtitle("Box plot of recommendation score by Traveller Type")
table(df$TypeOfTravel)
#ggplotly(traveltypePlot)
Business travel Mileage tickets Personal Travel 
           6319             855            3108 

Bar plot of Airline Status and Likelihood to recommend Score

In [9]:
airlineStatusPlot <- ggplot(df,aes(AirlineStatus,LikelihoodRecommendScore))+geom_boxplot(aes(fill=AirlineStatus))+
                 xlab("Airline Status")+ ylab("Likelihood to recommend Score")+plotTheme
                #ggtitle("Box plot of recommendation score by Airline Status")
table(df$AirlineStatus)
#ggplotly(airlineStatusPlot)
    Blue     Gold Platinum   Silver 
    6947      904      345     2086 
In [10]:
p1 <- genderPlot+theme(legend.position = "none")
p2 <- traveltypePlot+theme(legend.position = "none")
p3 <- airlineStatusPlot+theme(legend.position = "none")
p4 <- classPlot+theme(legend.position = "none")+ggtitle("Box plot of Likhlihood score w.r.t gender, class, travel type and airline status")


subplot(p1,p2,p3,p4,nrows=2,margin = 0.05)

Average recommendation score by age group

In [11]:
ageGroupPlot <-ggplot(df,aes(AgeGroup,LikelihoodRecommendScore))+
                 xlab("Traveller Type")+ ylab("Likelihood to recommend Score")+
                ggtitle("Distribution of Likhlihood Recommendation Score by age group")+plotTheme

p1 <- ageGroupPlot+geom_boxplot(aes(fill=AgeGroup))
p2 <- ageGroupPlot+geom_violin(aes(fill=AgeGroup))
#table(df$TypeOfTravel)
subplot(p1,p2)

Average recommendation score by Partner airlines

In [12]:
scoreByPartner <- ggplot(df,aes(PartnerCode,LikelihoodRecommendScore))+
                 xlab("Partner Airlines")+ ylab("Likelihood to recommend Score")+
                ggtitle("Distribution of Likhlihood Recommendation Score by Partner Airlines")+
                plotTheme+theme(legend.position = "none")

p1 <- scoreByPartner+geom_violin(aes(fill=PartnerCode))
p2 <- scoreByPartner+geom_boxplot(aes(fill=PartnerCode))
#scoreByPartner
subplot(p2,p1)
#ggplotly(scoreByPartner)

Number of reviews

Displaying total reviews of partner airlines by gender

In [13]:
dfT <- data.frame(table(df$PartnerCode,df$Gender))
colnames(dfT) <- c("PartnerCode","Gender","Count")
genderPlot <- ggplot(dfT,aes(x=PartnerCode,y=Count,group=Gender))+
              geom_col(aes(fill=Gender),show.legend=FALSE,position = "dodge")+
                xlab("Partner Airlines")+ ylab("Gender Count")+
                ggtitle("Distribution of Gender by Partner Airlines, Class, Travel Type, Gender and Airline Status")+
            plotTheme+theme(plot.title = element_text(size = 12, hjust = 0.5))
                     
ggplotly(genderPlot,tooltip=c("text","x","y"),dynamicTicks = TRUE)
    
#ggplotly(genderPlot,tooltip=c("text","x","y"),dynamicTicks = TRUE)
In [14]:
ggplotly(ggplot(dfT, aes(x=PartnerCode, y=Count)) +
  geom_segment( aes(x=PartnerCode, xend=PartnerCode, y=0, yend=Count), color="grey") +
  geom_point( aes(color=Gender), size=3) + plotTheme+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank()
  ))
In [15]:
dfClass <- data.frame(table(df$Class,df$Gender))
colnames(dfClass) <- c("Class","Gender","Count")
#dfClass
dfStatus <- data.frame(table(df$AirlineStatus,df$Gender))
colnames(dfStatus) <- c("AirlineStatus","Gender","Count")
#dfStatus
dfAge <- data.frame(table(df$AgeGroup,df$Gender))
colnames(dfAge) <- c("AgeGroup","Gender","Count")
#dfAge
dfType <- data.frame(table(df$TypeOfTravel,df$Gender))
colnames(dfType) <- c("dfType","Gender","Count")
In [16]:
p1 <- ggplot(dfClass,aes(x=Class,y=sort(Count),group=Gender))+
              geom_col(aes(fill=Gender),show.legend=FALSE,position = "dodge")+
              xlab("Class")+ ylab("Gender")+plotTheme+theme(legend.position = "none")

p2 <- ggplot(dfStatus,aes(x=AirlineStatus,y=sort(Count),group=Gender))+
              geom_col(aes(fill=Gender),show.legend=FALSE,position = "dodge")+
              xlab("Partner Airlines")+ ylab("Gender")+plotTheme+theme(legend.position = "none")

p3 <- ggplot(dfAge,aes(x=AgeGroup,y=sort(Count),group=Gender))+
              geom_col(aes(fill=Gender),show.legend=FALSE,position = "dodge")+
              xlab("Partner Airlines")+ ylab("Gender")+plotTheme+theme(legend.position = "none")

p4 <- ggplot(dfType,aes(x=dfType,y=sort(Count),group=Gender))+
              geom_col(aes(fill=Gender),show.legend=FALSE,position = "dodge")+
              xlab("Partner Airlines")+ ylab("Gender")+plotTheme+theme(legend.position = "none")


ggplotly(genderPlot)
subplot(p1,p2,p3,p4,nrows=2,margin=0.05)

Number of reviews by age group

In [17]:
dfT <- data.frame(table(df$PartnerCode,df$AgeGroup))
colnames(dfT) <- c("PartnerCode","AgeGroup","Count")
genderPlot <- ggplot(dfT,aes(x=PartnerCode,y=Count,group=AgeGroup))+
              geom_col(show.legend=FALSE,position = "dodge")+aes(fill=AgeGroup)+
                xlab("Partner Airlines")+ ylab("Age group Count")+
                ggtitle("Distribution of Age Group by Partner Airlines, Class, Travel Type, Gender and Airline Status")+
            plotTheme+theme(plot.title = element_text(size = 12, hjust = 0.5))+scale_colour_brewer("Blues")
                     
ggplotly(genderPlot)
    
#ggplotly(genderPlot,tooltip=c("text","x","y"),dynamicTicks = TRUE)
In [18]:
dfClass <- data.frame(table(df$Class,df$AgeGroup))
colnames(dfClass) <- c("Class","AgeGroup","Count")
#dfClass
dfStatus <- data.frame(table(df$AirlineStatus,df$AgeGroup))
colnames(dfStatus) <- c("AirlineStatus","AgeGroup","Count")
#dfStatus
dfAge <- data.frame(table(df$Gender,df$AgeGroup))
colnames(dfAge) <- c("Gender","AgeGroup","Count")
#dfAge
dfType <- data.frame(table(df$TypeOfTravel,df$AgeGroup))
colnames(dfType) <- c("TypeOfTravel","AgeGroup","Count")

p1 <- ggplot(dfClass,aes(x=Class,y=Count,group=AgeGroup))+
              geom_col(aes(fill=AgeGroup),show.legend=FALSE,position = "dodge")+
               plotTheme+theme(legend.position = "none")+coord_cartesian(ylim = c(0,3000))

p2 <- ggplot(dfStatus,aes(x=AirlineStatus,y=Count,group=AgeGroup))+
              geom_col(aes(fill=AgeGroup),show.legend=FALSE,position = "dodge")+
              plotTheme+theme(legend.position = "none")+coord_cartesian(ylim = c(0,3000))

p3 <- ggplot(dfAge,aes(x=Gender,y=Count,group=AgeGroup))+
              geom_col(aes(fill=AgeGroup),show.legend=FALSE,position = "dodge")+
              plotTheme+theme(legend.position = "none")+coord_cartesian(ylim = c(0,3000))

p4 <- ggplot(dfType,aes(x=TypeOfTravel,y=Count,group=AgeGroup))+
              geom_col(aes(fill=AgeGroup),show.legend=FALSE,position = "dodge")+
              plotTheme+theme(legend.position = "none")+coord_cartesian(ylim = c(0,3000))


ggplotly(genderPlot)
subplot(p1,p2,p3,p4,nrows=2,margin=0.05)
In [ ]:

In [ ]: